# CLEANING NEW RATINGS
# ALAN YAN
# 9-24-2020

#### SETUP ####
#clear environemnt
rm(list = ls())

#load libraries
library(pacman)
p_load(tidyverse,
       data.table)

#load data
dt <- read.csv("10-Replicate-Ratings/01-raw-data/silenced-ratings-raw.csv", header = TRUE, stringsAsFactors = FALSE)

#### CLEANING ####
dt %>%
  mutate(
    offensive1_n = case_when(
      offense1 == "Non-offensive" ~ 0,
      offense1 == "Slightly offensive" ~ .25,
      offense1 == "Moderately offensive" ~ .5,
      offense1 == "Fairly offensive" ~ .75,
      offense1 == "Very offensive" ~ 1,
    ),
    discourage1_n = case_when(
      discourage1 == "Really encourage" ~ 0,
      discourage1 == "Somewhat encourage" ~ 1/6,
      discourage1 == "Slightly encourage" ~ 1/3,
      discourage1 == "Neither encourage nor discourage" ~ .5,
      discourage1 == "Slightly discourage" ~ 2/3,
      discourage1 == "Somewhat discourage" ~ 5/6,
      discourage1 == "Really discourage" ~ 1,
    ),
    offensive2_n = case_when(
      offensive2 == "Non-offensive" ~ 0,
      offensive2 == "Slightly offensive" ~ .25,
      offensive2 == "Moderately offensive" ~ .5,
      offensive2 == "Fairly offensive" ~ .75,
      offensive2 == "Very offensive" ~ 1,
    ),
    discourage2_n = case_when(
      discourage2 == "Really encourage" ~ 0,
      discourage2 == "Somewhat encourage" ~ 1/6,
      discourage2 == "Slightly encourage" ~ 1/3,
      discourage2 == "Neither encourage nor discourage" ~ .5,
      discourage2 == "Slightly discourage" ~ 2/3,
      discourage2 == "Somewhat discourage" ~ 5/6,
      discourage2 == "Really discourage" ~ 1,
    ),
    offensive3_n = case_when(
      offensive3 == "Non-offensive" ~ 0,
      offensive3 == "Slightly offensive" ~ .25,
      offensive3 == "Moderately offensive" ~ .5,
      offensive3 == "Fairly offensive" ~ .75,
      offensive3 == "Very offensive" ~ 1,
    ),
    discourage3_n = case_when(
      discourage3 == "Really encourage" ~ 0,
      discourage3 == "Somewhat encourage" ~ 1/6,
      discourage3 == "Slightly encourage" ~ 1/3,
      discourage3 == "Neither encourage nor discourage" ~ .5,
      discourage3 == "Slightly discourage" ~ 2/3,
      discourage3 == "Somewhat discourage" ~ 5/6,
      discourage3 == "Really discourage" ~ 1,
    ),
    offensive4_n = case_when(
      offensive4 == "Non-offensive" ~ 0,
      offensive4 == "Slightly offensive" ~ .25,
      offensive4 == "Moderately offensive" ~ .5,
      offensive4 == "Fairly offensive" ~ .75,
      offensive4 == "Very offensive" ~ 1,
    ),
    discourage4_n = case_when(
      discourage4 == "Really encourage" ~ 0,
      discourage4 == "Somewhat encourage" ~ 1/6,
      discourage4 == "Slightly encourage" ~ 1/3,
      discourage4 == "Neither encourage nor discourage" ~ .5,
      discourage4 == "Slightly discourage" ~ 2/3,
      discourage4 == "Somewhat discourage" ~ 5/6,
      discourage4 == "Really discourage" ~ 1,
    ),
    offensive5_n = case_when(
      offensive5 == "Non-offensive" ~ 0,
      offensive5 == "Slightly offensive" ~ .25,
      offensive5 == "Moderately offensive" ~ .5,
      offensive5 == "Fairly offensive" ~ .75,
      offensive5 == "Very offensive" ~ 1,
    ),
    discourage5_n = case_when(
      discourage5 == "Really encourage" ~ 0,
      discourage5 == "Somewhat encourage" ~ 1/6,
      discourage5 == "Slightly encourage" ~ 1/3,
      discourage5 == "Neither encourage nor discourage" ~ .5,
      discourage5 == "Slightly discourage" ~ 2/3,
      discourage5 == "Somewhat discourage" ~ 5/6,
      discourage5 == "Really discourage" ~ 1,
    ),
    offensive6_n = case_when(
      offensive6 == "Non-offensive" ~ 0,
      offensive6 == "Slightly offensive" ~ .25,
      offensive6 == "Moderately offensive" ~ .5,
      offensive6 == "Fairly offensive" ~ .75,
      offensive6 == "Very offensive" ~ 1,
    ),
    discourage6_n = case_when(
      discourage6 == "Really encourage" ~ 0,
      discourage6 == "Somewhat encourage" ~ 1/6,
      discourage6 == "Slightly encourage" ~ 1/3,
      discourage6 == "Neither encourage nor discourage" ~ .5,
      discourage6 == "Slightly discourage" ~ 2/3,
      discourage6 == "Somewhat discourage" ~ 5/6,
      discourage6 == "Really discourage" ~ 1,
    ),
    offensive7_n = case_when(
      offensive7 == "Non-offensive" ~ 0,
      offensive7 == "Slightly offensive" ~ .25,
      offensive7 == "Moderately offensive" ~ .5,
      offensive7 == "Fairly offensive" ~ .75,
      offensive7 == "Very offensive" ~ 1,
    ),
    discourage7_n = case_when(
      discourage7 == "Really encourage" ~ 0,
      discourage7 == "Somewhat encourage" ~ 1/6,
      discourage7 == "Slightly encourage" ~ 1/3,
      discourage7 == "Neither encourage nor discourage" ~ .5,
      discourage7 == "Slightly discourage" ~ 2/3,
      discourage7 == "Somewhat discourage" ~ 5/6,
      discourage7 == "Really discourage" ~ 1,
    ),
    offensive8_n = case_when(
      offensive8 == "Non-offensive" ~ 0,
      offensive8 == "Slightly offensive" ~ .25,
      offensive8 == "Moderately offensive" ~ .5,
      offensive8 == "Fairly offensive" ~ .75,
      offensive8 == "Very offensive" ~ 1,
    ),
    discourage8_n = case_when(
      discourage8 == "Really encourage" ~ 0,
      discourage8 == "Somewhat encourage" ~ 1/6,
      discourage8 == "Slightly encourage" ~ 1/3,
      discourage8 == "Neither encourage nor discourage" ~ .5,
      discourage8 == "Slightly discourage" ~ 2/3,
      discourage8 == "Somewhat discourage" ~ 5/6,
      discourage8 == "Really discourage" ~ 1,
    ),
    offensive9_n = case_when(
      offensive9 == "Non-offensive" ~ 0,
      offensive9 == "Slightly offensive" ~ .25,
      offensive9 == "Moderately offensive" ~ .5,
      offensive9 == "Fairly offensive" ~ .75,
      offensive9 == "Very offensive" ~ 1,
    ),
    discourage9_n = case_when(
      discourage9 == "Really encourage" ~ 0,
      discourage9 == "Somewhat encourage" ~ 1/6,
      discourage9 == "Slightly encourage" ~ 1/3,
      discourage9 == "Neither encourage nor discourage" ~ .5,
      discourage9 == "Slightly discourage" ~ 2/3,
      discourage9 == "Somewhat discourage" ~ 5/6,
      discourage9 == "Really discourage" ~ 1,
    ),
    offensive10_n = case_when(
      offensive10 == "Non-offensive" ~ 0,
      offensive10 == "Slightly offensive" ~ .25,
      offensive10 == "Moderately offensive" ~ .5,
      offensive10 == "Fairly offensive" ~ .75,
      offensive10 == "Very offensive" ~ 1,
    ),
    discourage10_n = case_when(
      discourage10 == "Really encourage" ~ 0,
      discourage10 == "Somewhat encourage" ~ 1/6,
      discourage10 == "Slightly encourage" ~ 1/3,
      discourage10 == "Neither encourage nor discourage" ~ .5,
      discourage10 == "Slightly discourage" ~ 2/3,
      discourage10 == "Somewhat discourage" ~ 5/6,
      discourage10 == "Really discourage" ~ 1,
    ),
  ) -> dt

#big question: do we drop NAs and charge ahead with the avgs?
#### *STACK THE TEXTS AND CODINGS ####
text1 <- dt %>%
  select(text1,
         offensive1_n,
         discourage1_n) %>%
  rename(text = text1,
         offensive = offensive1_n,
         discourage = discourage1_n)

text2 <- dt %>%
  select(text2,
         offensive2_n,
         discourage2_n) %>%
  rename(text = text2,
         offensive = offensive2_n,
         discourage = discourage2_n)

text3 <- dt %>%
  select(text3,
         offensive3_n,
         discourage3_n) %>%
  rename(text = text3,
         offensive = offensive3_n,
         discourage = discourage3_n)

text4 <- dt %>%
  select(text4,
         offensive4_n,
         discourage4_n) %>%
  rename(text = text4,
         offensive = offensive4_n,
         discourage = discourage4_n)

text5 <- dt %>%
  select(text5,
         offensive5_n,
         discourage5_n) %>%
  rename(text = text5,
         offensive = offensive5_n,
         discourage = discourage5_n)

text6 <- dt %>%
  select(text6,
         offensive6_n,
         discourage6_n) %>%
  rename(text = text6,
         offensive = offensive6_n,
         discourage = discourage6_n)

text7 <- dt %>%
  select(text7,
         offensive7_n,
         discourage7_n) %>%
  rename(text = text7,
         offensive = offensive7_n,
         discourage = discourage7_n)

text8 <- dt %>%
  select(text8,
         offensive8_n,
         discourage8_n) %>%
  rename(text = text8,
         offensive = offensive8_n,
         discourage = discourage8_n)

text9 <- dt %>%
  select(text9,
         offensive9_n,
         discourage9_n) %>%
  rename(text = text9,
         offensive = offensive9_n,
         discourage = discourage9_n)

text10 <- dt %>%
  select(text10,
         offensive10_n,
         discourage10_n) %>%
  rename(text = text10,
         offensive = offensive10_n,
         discourage = discourage10_n)

rbind(text1,
      text2,
      text3,
      text4,
      text5,
      text6,
      text7,
      text8,
      text9,
      text10) %>% 
  #drop_na(offensive, discourage) %>% 
  group_by(text) %>%
  summarise(
    offensive_avg = mean(offensive)*100,
    discourage_avg = mean(discourage)*100
  ) -> text.clean

#### BRINGING IN EXPERIMENT DATA #####
exps <- read.csv("10-Replicate-Ratings/01-raw-data/clean_data.csv", header = TRUE, stringsAsFactors = FALSE)
exp1 <- read.csv("01-Experiment-1/data/04-clean-data/clean_data.csv", header = TRUE, stringsAsFactors = FALSE)
exp2 <- read.csv("02-Experiment-2/data/04-clean-data/clean_data.csv", header = TRUE, stringsAsFactors = FALSE)

#### *JOINING RATINGS TO CONVERSATION ID ####
left_join(
  exps,
  text.clean,
  by = c("cleaned" = "text")
) -> exps.clean
  
#### *CLEAN EXPERIMENT DATA ####
#renaming "texter_id" in study 2 to match "texter.id"
names(exp2)[9] <- "texter.id"

#change texter IDs from study 2 to distinguish between study 1 and study 2 when we merge
max.texter.id <- unique(exp1$texter.id) %>% max()
exp2$texter.id <- exp2$texter.id + max.texter.id

#create a dummy variable to represent fixed effects
exp1$experiment1 <- 1
exp2$experiment1 <- 0

exp1 %>%
  mutate(discouraging.index = NA) %>%
  rename(offensive_orig = Offensive) %>%
  select(conversation_id,
         offensive_orig, 
         gender, 
         texter.id,
         male,
         female,
         gen.neutral,
         no.name,
         male.instrument,
         female.instrument,
         gen.neutral.instrument,
         no.name.instrument,
         silenced,
         responded,
         experiment1,
         discouraging.index) -> exp1

exp2 %>%
  rename(offensive_orig = offensive.index) %>%
  select(message_id,
         offensive_orig, 
         gender, 
         texter.id,
         male,
         female,
         gen.neutral,
         no.name,
         male.instrument,
         female.instrument,
         gen.neutral.instrument,
         no.name.instrument,
         silenced,
         responded,
         experiment1,
         discouraging.index) %>%
  rename(conversation_id = message_id) -> exp2

rbind(exp1,
      exp2) %>%
  left_join(
    x = .,
    y = exps.clean[-2],
    by = c("conversation_id" = "conversation_id")
  ) -> dt.clean

dt.clean %>%
  mutate(
    offensive_avg = case_when(
      is.na(offensive_avg) == TRUE ~ 0,
      is.na(offensive_avg) == FALSE ~ offensive_avg,
    ),
    discourage_avg = case_when(
      is.na(discourage_avg) == TRUE ~ 50,
      is.na(discourage_avg) == FALSE ~ discourage_avg,
    ),
  ) -> dt.clean

#correlations between online survey respondents and original measures (Table S1)
cor(dt.clean$offensive_avg[dt.clean$responded == 100], 
    dt.clean$offensive_orig[dt.clean$responded == 100])
cor(dt.clean$discourage_avg[dt.clean$responded == 100 & dt.clean$experiment1 == 0], 
    dt.clean$discouraging.index[dt.clean$responded == 100 & dt.clean$experiment1 == 0])
table(dt.clean$offensive_avg[dt.clean$responded == 100 & dt.clean$experiment1 == 1],
      dt.clean$offensive_orig[dt.clean$responded == 100 & dt.clean$experiment1 == 1])

#### EXPORTING ####
write.csv(text.clean, "10-Replicate-Ratings/02-clean-data/text-ratings-clean.csv")
write_rds(text.clean, "10-Replicate-Ratings/02-clean-data/text-ratings-clean")
write.csv(dt.clean[-1], "10-Replicate-Ratings/02-clean-data/texts-joined-ratings-clean.csv")
write_rds(dt.clean[-1], "10-Replicate-Ratings/02-clean-data/texts-joined-ratings-clean")
